import pandas as pd
import seaborn as sns
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"
For this excercise, we have written the following code to load the stock dataset built into plotly express.
stocks = px.data.stocks()
#stocks.set_index('date', inplace = True)
stocks.head()
| date | GOOG | AAPL | AMZN | FB | NFLX | MSFT | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-01-01 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 1 | 2018-01-08 | 1.018172 | 1.011943 | 1.061881 | 0.959968 | 1.053526 | 1.015988 |
| 2 | 2018-01-15 | 1.032008 | 1.019771 | 1.053240 | 0.970243 | 1.049860 | 1.020524 |
| 3 | 2018-01-22 | 1.066783 | 0.980057 | 1.140676 | 1.016858 | 1.307681 | 1.066561 |
| 4 | 2018-01-29 | 1.008773 | 0.917143 | 1.163374 | 1.018357 | 1.273537 | 1.040708 |
Select a stock and create a suitable plot for it. Make sure the plot is readable with relevant information, such as date, values.
# YOUR CODE HERE
# Select data
x = stocks.date
y = stocks.GOOG
# Set the figure size
plt.rcParams["figure.figsize"] = [12.00, 12.00]
plt.rcParams["figure.autolayout"] = True
# Set subplot
fig, ax = plt.subplots()
ax.plot(x,y)
# set title
ax.set_title('Google stock')
# horizontal axis
ax.set_xlabel('Date')
# vertical axis
ax.set_ylabel('stock value')
# Select number of ticks
ticks = list(stocks['date'])
plt.xticks([ticks[i] for i in range(len(ticks)) if i % 10 == 0], rotation = 'horizontal')
plt.show()
You've already plot data from one stock. It is possible to plot multiples of them to support comparison.
To highlight different lines, customise line styles, markers, colors and include a legend to the plot.
# YOUR CODE HERE
# Select data
x = stocks.date
y = stocks.GOOG
y2 = stocks.AAPL
y3 = stocks.AMZN
y4 = stocks.FB
y5 = stocks.NFLX
y6 = stocks.MSFT
# Set the figure size
plt.rcParams["figure.figsize"] = [12.00, 12.00]
plt.rcParams["figure.autolayout"] = True
# Set subplot
fig, ax = plt.subplots()
# Plot data
ax.plot(x,y, label = 'GOOG')
ax.plot(x, y2, label = 'AAPL')
ax.plot(x, y3, label = 'AMZN')
ax.plot(x, y4, label = 'FB')
ax.plot(x, y5, label = 'NFLX')
ax.plot(x, y6, label = 'MSFT')
# set title
ax.set_title('Stocks')
# horizontal axis
ax.set_xlabel('Date')
# vertical axis
ax.set_ylabel('stock value')
# Legend
ax.legend(loc=0)
# Select number of ticks
ticks = list(stocks['date'])
plt.xticks([ticks[i] for i in range(len(ticks)) if i % 10 == 0], rotation = 'horizontal')
plt.show()
First, load the tips dataset
tips = sns.load_dataset('tips')
# Add column with tipping percentage to dataframe
tips['tip_percentage'] = tips.tip / tips.total_bill
tips.head()
| total_bill | tip | sex | smoker | day | time | size | tip_percentage | |
|---|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 0.059447 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 0.160542 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 0.166587 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 0.139780 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 0.146808 |
Let's explore this dataset. Pose a question and create a plot that support drawing answers for your question.
Some possible questions:
# YOUR CODE HERE
# Are there differences between male and female when it comes to giving tips?
# Set the figure size
plt.rcParams["figure.figsize"] = [12.00, 5]
plt.rcParams["figure.autolayout"] = True
#sns.scatterplot(x='total_bill', y='tip', hue = 'sex', data=tips)
sns.boxplot(x='sex', y='tip_percentage', data=tips)
plt.show()
Redo the above exercises (challenges 2 & 3) with plotly express. Create diagrams which you can interact with.
Hints:
stocks = px.data.stocks()
#stocks.set_index('date', inplace = True)
stocks.head()
| date | GOOG | AAPL | AMZN | FB | NFLX | MSFT | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-01-01 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 1 | 2018-01-08 | 1.018172 | 1.011943 | 1.061881 | 0.959968 | 1.053526 | 1.015988 |
| 2 | 2018-01-15 | 1.032008 | 1.019771 | 1.053240 | 0.970243 | 1.049860 | 1.020524 |
| 3 | 2018-01-22 | 1.066783 | 0.980057 | 1.140676 | 1.016858 | 1.307681 | 1.066561 |
| 4 | 2018-01-29 | 1.008773 | 0.917143 | 1.163374 | 1.018357 | 1.273537 | 1.040708 |
# YOUR CODE HERE
fig = px.line(stocks, x='date', y=stocks.columns[1:], markers = True)
fig.update_layout(legend=dict(
title="Stocks"
))
fig.show()
# YOUR CODE HERE
df = tips
fig = px.box(df, x = 'sex', y = 'tip_percentage')
fig.show()
Recreate the barplot below that shows the population of different continents for the year 2007.
Hints:
#load data
df = px.data.gapminder()
df.head()
| country | continent | year | lifeExp | pop | gdpPercap | iso_alpha | iso_num | |
|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | Asia | 1952 | 28.801 | 8425333 | 779.445314 | AFG | 4 |
| 1 | Afghanistan | Asia | 1957 | 30.332 | 9240934 | 820.853030 | AFG | 4 |
| 2 | Afghanistan | Asia | 1962 | 31.997 | 10267083 | 853.100710 | AFG | 4 |
| 3 | Afghanistan | Asia | 1967 | 34.020 | 11537966 | 836.197138 | AFG | 4 |
| 4 | Afghanistan | Asia | 1972 | 36.088 | 13079460 | 739.981106 | AFG | 4 |
# YOUR CODE HERE
df_2007 = df.query('year==2007')
df_2007_new = df_2007.groupby('continent').sum()
df_2007_new = df_2007_new.reset_index()
fig = px.bar(df_2007_new, y="continent", x="pop", color="continent", orientation="h", hover_name = 'pop', text = 'pop',
color_discrete_map={
"Europe": "red",
"Asia": "green",
"Americas": "blue",
"Oceania": "goldenrod",
"Africa": "magenta"},
category_orders={"continent": ["Asia", "Africa", "Americas", "Europe", "Oceania"]},
title="Question 5"
)
fig.show()